import numpy as np
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/bcaffo/ds4bme_intro/master/data/kirby21.csv").drop(['Unnamed: 0'], axis = 1)
## loading in the hierarchy information
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis=1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1",})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
## Removing whitespace
multilevel_lookup['level2'] = multilevel_lookup['level2'].str.strip()
## Making a list of ROIs to use for labels, and for indexing later
rois = df[ ['roi', 'level'] ][(df.type == 1) & (df.id == id)]
rois = ['ICV'] + rois['roi'].unique().tolist()
# print(rois)
## Loading subject data
id = 127
df = df.loc[(df.type == 1) & (df.level == 5) & (df.id == id)]
df = df[['roi', 'volume']]
## Merge the subject data with the multilevel data
df = pd.merge(df, multilevel_lookup, on = "roi")
df = df.assign(level0 = "ICV")
df = df.assign(comp = df.volume / np.sum(df.volume))
## Prepping data the sankey diagram format
## Level 0 to Level 1
l1 = df.groupby(['level1', 'level0']).sum().reset_index()
l1 = l1.rename(columns = {'level1' : 'target', 'level0' : 'source'}).drop(['volume'], axis = 1)
## Level 1 to Level 2
l2 = df.groupby(['level2', 'level1']).sum().reset_index()
l2 = l2.rename(columns = {'level2' : 'target', 'level1' : 'source'}).drop(['volume'], axis = 1)
## Level 2 to Level 3
l3 = df.groupby(['level3', 'level2']).sum().reset_index()
l3 = l3.rename(columns = {'level3' : 'target', 'level2' : 'source'}).drop(['volume'], axis = 1)
## Level 3 to Level 4
l4 = df.groupby(['level4', 'level3']).sum().reset_index()
l4 = l4.rename(columns = {'level4' : 'target', 'level3' : 'source'}).drop(['volume'], axis = 1)
## Concatenating the sankey datasets
sankey_df = pd.concat([l1, l2, l3])
## Adding a list of integers to index the regions instead oftheir names
sankey_df['target_idx'] = [rois.index(x) for x in sankey_df['target']]
sankey_df['source_idx'] = [rois.index(x) for x in sankey_df['source']]
## Inputting into the sankey format
import plotly.graph_objects as go
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = rois,
color = "blue"
),
link = dict(
source = sankey_df['source_idx'], # indices correspond to labels, eg A1, A2, A1, B1, ...
target = sankey_df['target_idx'],
value = sankey_df['comp']
))])
fig.update_layout(title_text="Brain composition - Sankey Diagram", height = 1200, font_size=12)
fig.show()